From b70d3abc763d8ee495879346a380ca75388584f4 Mon Sep 17 00:00:00 2001 From: oliskoli Date: Tue, 24 Jun 2008 22:41:03 +0000 Subject: [PATCH] cet, cet_util: Add some new functions. --- cet.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++- cet.h | 12 +++++- cet_util.c | 9 ++++- cet_util.h | 3 +- 4 files changed, 132 insertions(+), 5 deletions(-) diff --git a/cet.c b/cet.c index e003e57c5..2df913314 100644 --- a/cet.c +++ b/cet.c @@ -2,7 +2,7 @@ Character encoding transformation - basics - Copyright (C) 2005 Olaf Klein, o.b.klein@gpsbabel.org + Copyright (C) 2005-2008 Olaf Klein, o.b.klein@gpsbabel.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -262,6 +262,81 @@ cet_utf8_to_char(const char *str, const cet_cs_vec_t *vec, /* out */ int *bytes, return cet_ucs4_to_char(v, vec); } +/* =========================================================================== */ +/* %%% UTF-8 string manipulation functions %%% */ +/* =========================================================================== */ + +/* %%% cet_utf8_strlen %%% + * + * Returns the number of valid (visible) characters. + */ +int +cet_utf8_strlen(const char *str) +{ + if (str) { + const char *cin = str; + int len = 0; + + while (*cin) { + int bytes, value; + if (CET_SUCCESS == cet_utf8_to_ucs4(cin, &bytes, &value)) len++; + cin += bytes; + } + return len; + } + else + return 0; +} + +/* %%% cet_utf8_strdup %%% + * + * Checks and duplicates an UTF-8 string + */ +char * +cet_utf8_strdup(const char *str) +{ + if (str) + return cet_utf8_strndup(str, strlen(str)); + else + return NULL; +} + +/* %%% cet_utf8_strndup %%% + * + * Checks and duplicates an UTF-8 string + */ +char * +cet_utf8_strndup(const char *str, const int maxlen) +{ + if (str) { + const char *cin = str; + char *res, *cout; + int len = 0; + + res = cout = xstrdup(cin); + + while (*cin && (len < maxlen)) { + int bytes, value; + if (CET_SUCCESS == cet_utf8_to_ucs4(cin, &bytes, &value)) { + cout += cet_ucs4_to_utf8(cout, 6, value); + len += 1; + } + cin += bytes; + } + *cout = '\0'; + + if ((cin - str) != (cout - res)) { + cout = xstrdup(res); + xfree(res); + res = cout; + } + + return res; + } + else + return NULL; +} + /* =========================================================================== */ /* %%% full string transformation %%% */ /* =========================================================================== */ @@ -364,3 +439,39 @@ cet_str_uni_to_utf8(const short *src, const int length) return res; } + +/* %%% cet_str_any_to_uni %%% + * + * Converts a string in given character set to a 'wide string' (unicode) + */ +short * +cet_str_any_to_uni(const char *src, const cet_cs_vec_t *vec, int *length) +{ + char *utf8; + int len; + short *res, *sout; + + if (vec->ucs4_count == 0) utf8 = cet_utf8_strdup(src); /* UTF-8 -> clean UTF-8 */ + else utf8 = cet_str_any_to_utf8(src, vec); + + len = cet_utf8_strlen(utf8); + res = sout = xcalloc(2, len + 1); + + if (len) { + char *cin = utf8; + + while (*cin) { + int bytes, value; + if (CET_SUCCESS == cet_utf8_to_ucs4(cin, &bytes, &value)) { + le_write16(sout, value); + sout++; + } + cin += bytes; + } + } + + *sout = 0; + if (length) *length = len; + + return res; +} diff --git a/cet.h b/cet.h index 6b32f1edc..aabec07b2 100644 --- a/cet.h +++ b/cet.h @@ -2,7 +2,7 @@ Character encoding transformation - basics header - Copyright (C) 2005 Olaf Klein, o.b.klein@gpsbabel.org + Copyright (C) 2005-2008 Olaf Klein, o.b.klein@gpsbabel.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -68,4 +68,14 @@ char *cet_str_any_to_utf8(const char *src, const cet_cs_vec_t *vec); char *cet_str_uni_to_utf8(const short *src, const int length); +/* UTF-8 string manipulation functions */ + +int cet_utf8_strlen(const char *str); +char *cet_utf8_strdup(const char *str); +char *cet_utf8_strndup(const char *str, const int maxlen); + +/* unicode functions */ + +short *cet_str_any_to_uni(const char *src, const cet_cs_vec_t *vec, int *length); + #endif diff --git a/cet_util.c b/cet_util.c index d8fd3930f..f393b9112 100644 --- a/cet_util.c +++ b/cet_util.c @@ -2,7 +2,7 @@ Character encoding transformation - utilities - Copyright (C) 2005,2006,2007 Olaf Klein, o.b.klein@gpsbabel.org + Copyright (C) 2005-2008 Olaf Klein, o.b.klein@gpsbabel.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -106,6 +106,11 @@ cet_str_cp1252_to_utf8(const char *src) return cet_str_any_to_utf8(src, &cet_cs_vec_cp1252); } +short * +cet_str_utf8_to_uni(const char *src, int *length) +{ + return cet_str_any_to_uni(src, &cet_cs_vec_utf8, length); +} /* helpers */ @@ -1115,7 +1120,7 @@ cet_disp_character_set_names(FILE *fout) ac = 0; - fprintf(fout, "GPSbabel builtin character sets: (-c option)\n"); + fprintf(fout, "GPSBabel builtin character sets: (-c option)\n"); for (i = 0; i < c; i++) { char **a; diff --git a/cet_util.h b/cet_util.h index 9a010a459..389418157 100644 --- a/cet_util.h +++ b/cet_util.h @@ -2,7 +2,7 @@ Character encoding transformation - utilities header - Copyright (C) 2005 Olaf Klein, o.b.klein@gpsbabel.org + Copyright (C) 2005-2008 Olaf Klein, o.b.klein@gpsbabel.org This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -56,6 +56,7 @@ char *cet_str_utf8_to_us_ascii(const char *src); char *cet_str_us_ascii_to_utf8(const char *src); extern cet_cs_vec_t cet_cs_vec_ansi_x3_4_1968; +short *cet_str_utf8_to_uni(const char *src, int *length); extern cet_cs_vec_t cet_cs_vec_utf8; -- 2.30.2